The goal of Cats vs Dog classification using deep learning, particularly with the addition of VGG16, is to build a model capable of accurately distinguishing between images of cats and dogs. This involves leveraging the VGG16 architecture by fine-tuning it on Cats vs Dog dataset. Ultimately, the aim is to create a robust and accurate model for automated classification of cat and dog images.
In the context of Cats vs Dog classification using deep learning, adding VGG16 refers to leveraging the VGG16 architecture as a pre-trained model for feature extraction. VGG16 is a popular convolutional neural network architecture that has been pre-trained on the ImageNet dataset, making it capable of recognizing a wide range of visual features.
Fine-tune: Fine-tune the VGG16 model on our Cats vs Dog dataset by updating its weights during training to better adapt to our specific task.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_curve
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.applications import VGG16
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom, RandomBrightness, Rescaling, RandomTranslation, RandomWidth, RandomHeight
import pandas as pd
import tensorflow as tf
import os
import shutil
from PIL import Image
import random
Train = "C:\\Users\\simranjeet\\Desktop\\cscn8010\\train\\train"
Test = "C:\\Users\\simranjeet\\Desktop\\cscn8010\\test1\\test1"
train_img_list = os.listdir(Train)
test_img_list = os.listdir(Test)
print("Length of Train Images: ",len(train_img_list))
print("Length of Test Images: ",len(test_img_list))
Length of Train Images: 25000 Length of Test Images: 11273
parent_path = "C:\\Users\\simranjeet\\Desktop\\cscn8010\\cat_and_dog"
try:
os.mkdir(parent_path)
for path in ['training', 'validation']:
path = os.path.join(parent_path,path)
os.mkdir(path)
print(path,'is created')
for category in ['cats','dogs']:
new_path = os.path.join(path, category)
os.mkdir(new_path)
print(new_path,'is created')
except OSError:
print('The Folder exists already')
C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\training is created C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\training\cats is created C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\training\dogs is created C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\validation is created C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\validation\cats is created C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\validation\dogs is created
cats_images = [img for img in train_img_list if img.startswith('cat')]
dogs_images = [img for img in train_img_list if img.startswith('dog')]
len(cats_images), len(dogs_images)
(12500, 12500)
# shuffle the lists of images
np.random.shuffle(cats_images)
np.random.shuffle(dogs_images)
# split the lists into train and validation
threshold = int(0.9 * len(cats_images))
train_cats, validation_cats = np.split(cats_images, [threshold])
train_dogs, validation_dogs = np.split(dogs_images, [threshold])
# define images destination folders
train_cats_path = r'C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\training\cats'
train_dogs_path = r'C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\training\dogs'
validation_cats_path = r'C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\validation\cats'
validation_dogs_path = r'C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\validation\dogs'
# Move images to training directories
for i in range(threshold):
src_path_cats = os.path.join(Train, train_cats[i])
src_path_dogs = os.path.join(Train, train_dogs[i])
dest_path_cats = os.path.join(train_cats_path, train_cats[i])
dest_path_dogs = os.path.join(train_dogs_path, train_dogs[i])
shutil.move(src_path_cats, dest_path_cats)
shutil.move(src_path_dogs, dest_path_dogs)
# Move images to validation directories
for i in range(threshold, len(cats_images)):
src_path_cats = os.path.join(Train, validation_cats[i - threshold])
src_path_dogs = os.path.join(Train, validation_dogs[i - threshold])
dest_path_cats = os.path.join(validation_cats_path, validation_cats[i - threshold])
dest_path_dogs = os.path.join(validation_dogs_path, validation_dogs[i - threshold])
shutil.move(src_path_cats, dest_path_cats)
shutil.move(src_path_dogs, dest_path_dogs)
print('Number of cats in training directory: ', len(os.listdir(train_cats_path)))
print('Number of cats in validation directory: ', len(os.listdir(validation_cats_path)))
print('Number of dogs in training directory: ', len(os.listdir(train_dogs_path)))
print('Number of dogs in validation directory: ', len(os.listdir(validation_dogs_path)))
Number of cats in training directory: 11250 Number of cats in validation directory: 1250 Number of dogs in training directory: 11250 Number of dogs in validation directory: 1250
fig, ax = plt.subplots(2,4,figsize=(20,12))
for i in range(8):
img_filename = random.choice(train_cats)
img_path = os.path.join(train_cats_path, img_filename)
img = Image.open(img_path)
ax[i//4, i%4].imshow(img)
ax[i//4, i%4].axis('off')
fig, ax = plt.subplots(2,4,figsize=(20,12))
for i in range(8):
img_filename = random.choice(train_dogs)
img_path = os.path.join(train_dogs_path, img_filename)
img = Image.open(img_path)
ax[i//4, i%4].imshow(img)
ax[i//4, i%4].axis('off')
BATCH_SIZE = 32
WIDTH = 150
HEIGHT = 150
CHANNELS = 3
# Define paths to training and validation directories
train_dir = r'C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\training'
validation_dir = r'C:\Users\simranjeet\Desktop\cscn8010\cat_and_dog\validation'
# training data generator
train_ds = image_dataset_from_directory(
train_dir,
labels='inferred',
label_mode='binary',
color_mode='rgb',
image_size=(HEIGHT,WIDTH),
batch_size=BATCH_SIZE
)
# validation data generator
validation_ds = image_dataset_from_directory(
validation_dir,
labels='inferred',
label_mode='binary',
color_mode='rgb',
image_size=(HEIGHT,WIDTH),
batch_size=BATCH_SIZE
)
Found 22500 files belonging to 2 classes. Found 2500 files belonging to 2 classes.
class_names = train_ds.class_names
plt.figure(figsize=(20, 8))
for images, labels in train_ds.take(1):
for i in range(8):
ax = plt.subplot(2, 4, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(class_names[int(labels[i])][:3])
plt.axis("off")
class_names = validation_ds.class_names
plt.figure(figsize=(20, 8))
for images, labels in validation_ds.take(1):
for i in range(8):
ax = plt.subplot(2, 4, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
plt.title(class_names[int(labels[i])][:3])
plt.axis("off")
# Count occurrences of each class
class_counts = train_ds.class_names
unique_classes, counts = np.unique(class_counts, return_counts=True)
# Plot count plot
plt.figure(figsize=(8, 5))
plt.bar(class_names, counts, color='skyblue')
plt.title('Class Distribution')
plt.xlabel('Class')
plt.ylabel('Count')
plt.show()
cats and dogs with an equal number of instances for each class. This is balanced.
AUTOTUNE = tf.data.experimental.AUTOTUNE
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
# define callbacks
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
early_stopping = EarlyStopping(monitor='val_loss', patience=5, min_delta=0.001, restore_best_weights=True)
# CNN architecture
cnn = Sequential(
[
Rescaling(1./255, input_shape=(HEIGHT, WIDTH, CHANNELS)),
Conv2D(32, (3,3), activation='relu'),
MaxPooling2D(2,2),
Conv2D(64, (3,3), activation='relu'),
MaxPooling2D(2,2),
Conv2D(128, (3,3), activation='relu'),
MaxPooling2D(2,2),
Flatten(),
Dense(128, activation='relu'),
Dense(64, activation='relu'),
Dense(1, activation='sigmoid')
]
)
# compile the model
cnn.compile(optimizer='adamax', loss='binary_crossentropy', metrics=['accuracy'])
# model summary
cnn.summary()
WARNING:tensorflow:From c:\Users\simranjeet\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\backend.py:873: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.
WARNING:tensorflow:From c:\Users\simranjeet\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\layers\pooling\max_pooling2d.py:161: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.
WARNING:tensorflow:From c:\Users\simranjeet\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\optimizers\__init__.py:309: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
rescaling (Rescaling) (None, 150, 150, 3) 0
conv2d (Conv2D) (None, 148, 148, 32) 896
max_pooling2d (MaxPooling2 (None, 74, 74, 32) 0
D)
conv2d_1 (Conv2D) (None, 72, 72, 64) 18496
max_pooling2d_1 (MaxPoolin (None, 36, 36, 64) 0
g2D)
conv2d_2 (Conv2D) (None, 34, 34, 128) 73856
max_pooling2d_2 (MaxPoolin (None, 17, 17, 128) 0
g2D)
flatten (Flatten) (None, 36992) 0
dense (Dense) (None, 128) 4735104
dense_1 (Dense) (None, 64) 8256
dense_2 (Dense) (None, 1) 65
=================================================================
Total params: 4836673 (18.45 MB)
Trainable params: 4836673 (18.45 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
history = cnn.fit(train_ds, validation_data=validation_ds, epochs=10, callbacks=[early_stopping])
Epoch 1/10 704/704 [==============================] - 80s 114ms/step - loss: 0.5531 - accuracy: 0.7115 - val_loss: 0.4595 - val_accuracy: 0.7864 Epoch 2/10 704/704 [==============================] - 80s 114ms/step - loss: 0.4481 - accuracy: 0.7892 - val_loss: 0.4302 - val_accuracy: 0.8064 Epoch 3/10 704/704 [==============================] - 81s 115ms/step - loss: 0.3816 - accuracy: 0.8284 - val_loss: 0.3852 - val_accuracy: 0.8236 Epoch 4/10 704/704 [==============================] - 80s 113ms/step - loss: 0.3247 - accuracy: 0.8583 - val_loss: 0.3675 - val_accuracy: 0.8372 Epoch 5/10 704/704 [==============================] - 80s 114ms/step - loss: 0.2652 - accuracy: 0.8870 - val_loss: 0.3708 - val_accuracy: 0.8408 Epoch 6/10 704/704 [==============================] - 80s 113ms/step - loss: 0.2089 - accuracy: 0.9155 - val_loss: 0.3740 - val_accuracy: 0.8388 Epoch 7/10 704/704 [==============================] - 81s 115ms/step - loss: 0.1523 - accuracy: 0.9414 - val_loss: 0.4150 - val_accuracy: 0.8476 Epoch 8/10 704/704 [==============================] - 80s 114ms/step - loss: 0.1125 - accuracy: 0.9562 - val_loss: 0.4829 - val_accuracy: 0.8424 Epoch 9/10 704/704 [==============================] - 80s 114ms/step - loss: 0.0713 - accuracy: 0.9744 - val_loss: 0.5712 - val_accuracy: 0.8512
def plot_history(history):
epochs_range = range(1, len(history.history['loss'])+1)
# plot accuracy curves
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, history.history['accuracy'], label='Training Accuracy')
plt.plot(epochs_range, history.history['val_accuracy'], label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
# plot loss curves
plt.subplot(1, 2, 2)
plt.plot(epochs_range, history.history['loss'], label='Training Loss')
plt.plot(epochs_range, history.history['val_loss'], label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
plot_history(history)
print("Training Accuracy: ", cnn.evaluate(train_ds, verbose=None)[1])
print("Validation Accuracy: ", cnn.evaluate(validation_ds, verbose=None)[1])
Training Accuracy: 0.88919997215271 Validation Accuracy: 0.8371999859809875
from sklearn.metrics import classification_report
# Predict labels for validation dataset
y_pred = cnn.predict(validation_ds).flatten() > 0.5 # Assuming binary classification with threshold 0.5
y_true = np.concatenate([y for _, y in validation_ds], axis=0)
# Generate classification report
print("Classification Report for Validation Dataset:")
print(classification_report(y_true, y_pred))
79/79 [==============================] - 3s 34ms/step
Classification Report for Validation Dataset:
precision recall f1-score support
0.0 0.50 0.46 0.48 1250
1.0 0.50 0.54 0.52 1250
accuracy 0.50 2500
macro avg 0.50 0.50 0.50 2500
weighted avg 0.50 0.50 0.50 2500
The current CNN model demonstrates promising performance. Here's a brief analysis:
The model shows good generalization to unseen data, as the validation accuracy is relatively close to the training accuracy. However, there is a noticeable accuracy gap between training and validation, indicating potential overfitting. We got Tranning accuracy 0.88919997215271 and Validation accuracy 0.8371999859809875
Precision for cats (label 0) is 0.50, indicating half of the images predicted as cats are correct. Precision for dogs (label 1) is 0.50, suggesting half of the images predicted as dogs are correct. Recall for cats (label 0) is 0.46, meaning 46% of actual cats are correctly identified by the model. Recall for dogs (label 1) is 0.54, indicating 54% of actual dogs are correctly identified by the model. F1-score for cats (label 0) is 0.48, representing a harmonic mean of precision and recall for cat predictions. F1-score for dogs (label 1) is 0.52, representing a harmonic mean of precision and recall for dog predictions. The accuracy of the model on the validation dataset is 0.50, meaning it correctly classifies half of the images. The macro average precision, recall, and F1-score are all 0.50, representing an average across both classes.
cnn.save("first_model.h5")
c:\Users\simranjeet\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.
saving_api.save_model(
from keras.models import load_model
loaded_model_cnn = load_model("first_model.h5")
# Define images and ids lists
from keras.preprocessing import image
test_images = []
test_ids = []
file_names = os.listdir(Test)
# This loop converts the images to tensors and extracts image ids
for file_name in file_names:
img_path = os.path.join(Test, file_name)
img_id = file_name.replace('.jpg', '')
img = image.load_img(img_path, target_size=(WIDTH, HEIGHT))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
test_images.append(img_array)
test_ids.append(img_id)
test_images = np.vstack(test_images)
len(test_images)
11273
There are 11273 images in test set we took 25 images for test predictions, below is the result of test images prediction which includes:
ID --> Image_Name Label --> Class of images['Cat'(0),'Dog'(1)]
test_predictions = loaded_model_cnn.predict(test_images[:25])
test_predictions = (test_predictions > 0.5).astype(int)
results = pd.DataFrame({'id': test_ids[:25], 'label': test_predictions.reshape(-1)})
results.head()
1/1 [==============================] - 0s 45ms/step
| id | label | |
|---|---|---|
| 0 | 1 | 1 |
| 1 | 10 | 0 |
| 2 | 100 | 1 |
| 3 | 1000 | 1 |
| 4 | 10000 | 1 |
In the pursuit of achieving even higher performance, we've incorporated transfer learning and fine tune model using the VGG16 architecture. Here are the key steps and details:
Instantiation of the Base Model: We utilize the pre-trained VGG16 model with weights from ImageNet and exclude the top classification layers. Freezing CNN Layers: The layers of the VGG16 base model are frozen, preventing them from being updated during training. Model Architecture: A new Sequential model is constructed on top of the frozen VGG16 base. This includes flattening the output and adding densely connected layers. Compilation: The model is compiled using the 'adamax' optimizer and binary crossentropy loss for binary classification. Model Architecture:
The VGG16 base model followed by three dense layers with dropout for classification. The dense layers facilitate the model in learning task-specific patterns while leveraging pre-trained features from VGG16. Benefits of Transfer Learning:
Knowledge Transfer: Leveraging pre-trained features from VGG16 can benefit the model in recognizing complex patterns. Reduced Training Time: Utilizing pre-trained weights reduces the need for extensive training on the new dataset.
# instantiate the base model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(WIDTH, HEIGHT, CHANNELS))
# Freezing the CNN layers
for layer in base_model.layers:
layer.trainable = False
# Base model summary
base_model.summary()
Model: "vgg16"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 150, 150, 3)] 0
block1_conv1 (Conv2D) (None, 150, 150, 64) 1792
block1_conv2 (Conv2D) (None, 150, 150, 64) 36928
block1_pool (MaxPooling2D) (None, 75, 75, 64) 0
block2_conv1 (Conv2D) (None, 75, 75, 128) 73856
block2_conv2 (Conv2D) (None, 75, 75, 128) 147584
block2_pool (MaxPooling2D) (None, 37, 37, 128) 0
block3_conv1 (Conv2D) (None, 37, 37, 256) 295168
block3_conv2 (Conv2D) (None, 37, 37, 256) 590080
block3_conv3 (Conv2D) (None, 37, 37, 256) 590080
block3_pool (MaxPooling2D) (None, 18, 18, 256) 0
block4_conv1 (Conv2D) (None, 18, 18, 512) 1180160
block4_conv2 (Conv2D) (None, 18, 18, 512) 2359808
block4_conv3 (Conv2D) (None, 18, 18, 512) 2359808
block4_pool (MaxPooling2D) (None, 9, 9, 512) 0
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 150, 150, 3)] 0
block1_conv1 (Conv2D) (None, 150, 150, 64) 1792
block1_conv2 (Conv2D) (None, 150, 150, 64) 36928
block1_pool (MaxPooling2D) (None, 75, 75, 64) 0
block2_conv1 (Conv2D) (None, 75, 75, 128) 73856
block2_conv2 (Conv2D) (None, 75, 75, 128) 147584
block2_pool (MaxPooling2D) (None, 37, 37, 128) 0
block3_conv1 (Conv2D) (None, 37, 37, 256) 295168
block3_conv2 (Conv2D) (None, 37, 37, 256) 590080
block3_conv3 (Conv2D) (None, 37, 37, 256) 590080
block3_pool (MaxPooling2D) (None, 18, 18, 256) 0
block4_conv1 (Conv2D) (None, 18, 18, 512) 1180160
block4_conv2 (Conv2D) (None, 18, 18, 512) 2359808
block4_conv3 (Conv2D) (None, 18, 18, 512) 2359808
block4_pool (MaxPooling2D) (None, 9, 9, 512) 0
block5_conv1 (Conv2D) (None, 9, 9, 512) 2359808
block5_conv2 (Conv2D) (None, 9, 9, 512) 2359808
block5_conv3 (Conv2D) (None, 9, 9, 512) 2359808
block5_pool (MaxPooling2D) (None, 4, 4, 512) 0
=================================================================
Total params: 14714688 (56.13 MB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 14714688 (56.13 MB)
_________________________________________________________________
# CNN architecture
model_vgg = Sequential(
[
base_model,
Flatten(),
Dense(512, activation='relu'),
Dropout(0.3),
Dense(256, activation='relu'),
Dropout(0.3),
Dense(1, activation='sigmoid')
]
)
# compile the model
model_vgg.compile(optimizer='adamax', loss='binary_crossentropy', metrics=['accuracy'])
# model summary
model_vgg.summary()
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
vgg16 (Functional) (None, 4, 4, 512) 14714688
flatten_1 (Flatten) (None, 8192) 0
dense_3 (Dense) (None, 512) 4194816
dropout (Dropout) (None, 512) 0
dense_4 (Dense) (None, 256) 131328
dropout_1 (Dropout) (None, 256) 0
dense_5 (Dense) (None, 1) 257
=================================================================
Total params: 19041089 (72.64 MB)
Trainable params: 4326401 (16.50 MB)
Non-trainable params: 14714688 (56.13 MB)
_________________________________________________________________
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
vgg16 (Functional) (None, 4, 4, 512) 14714688
flatten_1 (Flatten) (None, 8192) 0
dense_3 (Dense) (None, 512) 4194816
dropout (Dropout) (None, 512) 0
dense_4 (Dense) (None, 256) 131328
dropout_1 (Dropout) (None, 256) 0
dense_5 (Dense) (None, 1) 257
=================================================================
Total params: 19041089 (72.64 MB)
Trainable params: 4326401 (16.50 MB)
Non-trainable params: 14714688 (56.13 MB)
_________________________________________________________________
history_vgg = model_vgg.fit(train_ds, validation_data=validation_ds, epochs=10, callbacks=[early_stopping])
Epoch 1/10 704/704 [==============================] - 467s 662ms/step - loss: 0.9726 - accuracy: 0.9173 - val_loss: 0.2327 - val_accuracy: 0.9348 Epoch 2/10 704/704 [==============================] - 441s 626ms/step - loss: 0.2463 - accuracy: 0.9468 - val_loss: 0.1225 - val_accuracy: 0.9532 Epoch 3/10 704/704 [==============================] - 441s 627ms/step - loss: 0.1471 - accuracy: 0.9601 - val_loss: 0.1165 - val_accuracy: 0.9516 Epoch 4/10 704/704 [==============================] - 2621s 4s/step - loss: 0.1026 - accuracy: 0.9687 - val_loss: 0.1443 - val_accuracy: 0.9524 Epoch 5/10 704/704 [==============================] - 438s 622ms/step - loss: 0.0760 - accuracy: 0.9768 - val_loss: 0.1388 - val_accuracy: 0.9524 Epoch 6/10 704/704 [==============================] - 2234s 3s/step - loss: 0.0641 - accuracy: 0.9802 - val_loss: 0.1653 - val_accuracy: 0.9588 Epoch 7/10 704/704 [==============================] - 1018s 1s/step - loss: 0.0445 - accuracy: 0.9855 - val_loss: 0.1617 - val_accuracy: 0.9580 Epoch 8/10 704/704 [==============================] - 459s 652ms/step - loss: 0.0290 - accuracy: 0.9916 - val_loss: 0.2015 - val_accuracy: 0.9584
plot_history(history_vgg)
print("Training Accuracy: ", model_vgg.evaluate(train_ds, verbose=None)[1])
print("Validation Accuracy: ", model_vgg.evaluate(validation_ds, verbose=None)[1])
Training Accuracy: 0.9830666780471802 Validation Accuracy: 0.9516000151634216
Certainly! Here are concise points summarizing the improvements observed in training and test accuracies using VGG16 as transfer learning:
model_vgg.save("VGG16.h5")
from sklearn.metrics import classification_report
# Predict labels for validation dataset
y_pred = model_vgg.predict(validation_ds).flatten() > 0.5
y_true = np.concatenate([y for _, y in validation_ds], axis=0)
# Generate classification report
print("Classification Report for Validation Dataset:")
print(classification_report(y_true, y_pred))
from keras.models import load_model
loaded_model_vgg = load_model("VGG16.h5")
test_predictions = loaded_model_vgg.predict(test_images[:25])
test_predictions = (test_predictions > 0.5).astype(int)
results = pd.DataFrame({'id': test_ids[:25], 'label': test_predictions.reshape(-1)})
results.head()
1/1 [==============================] - 1s 1s/step
| id | label | |
|---|---|---|
| 0 | 1 | 1 |
| 1 | 10 | 0 |
| 2 | 100 | 0 |
| 3 | 1000 | 1 |
| 4 | 10000 | 1 |
The model trained using transfer learning with the VGG16 architecture has showcased exceptional performance.
Observations:
The model exhibits significantly improved accuracy, with both training and validation accuracies surpassing previous models. Transfer learning with VGG16 has effectively leveraged pre-trained features, allowing the model to excel in recognizing intricate patterns. Benefits of Transfer Learning:
Enhanced Generalization: Transfer learning facilitated the model in understanding complex image features. High Accuracy: The model achieved remarkable accuracies, indicating robust learning.
Fine-tuning the model by re-running the optimizer in the last layers to improve accuracy. When you use a smaller learning rate, you take smaller steps to adapt it a little more closely to the new data. In transfer learning, the way you achieve this is by unfreezing the layers at the end of the network, and then re-training your model on the final layers with a very low learning rate. Adapting your learning rate to go over these layers in smaller steps can yield more fine details - and higher accuracy.
The intuition for what's happening: when the network is in its earlier stages, it trains on low-level features, like edges. In the later layers, more complex, high-level features like wispy hair or pointy ears begin to emerge. For transfer learning, the low-level features can be kept the same, as they have common features for most images. When you add new data, you generally want the high-level features to adapt to it, which is rather like letting the network learn to detect features more related to your data.
To achieve this, just unfreeze the final layers and re-run the optimizer with a smaller learning rate, while keeping all the other layers frozen.
Where the final layers actually begin is a bit arbitrary. The important takeaway is that the later layers are the part of your network that contain the fine details (pointy ears, hairy tails) that are more specific to your problem.
# Freezing the CNN layers
from tensorflow.keras.optimizers import Adamax
for layer in base_model.layers[-10:]:
layer.trainable = True
# compile the model
model_vgg.compile(optimizer=Adamax(learning_rate=0.0001), loss='binary_crossentropy', metrics=['accuracy'])
history_vgg_fine_tuned = model_vgg.fit(train_ds, validation_data=validation_ds, epochs=10, callbacks=[early_stopping])
Epoch 1/10 704/704 [==============================] - 1030s 1s/step - loss: 0.1342 - accuracy: 0.9513 - val_loss: 0.0859 - val_accuracy: 0.9668 Epoch 2/10 704/704 [==============================] - 947s 1s/step - loss: 0.0543 - accuracy: 0.9794 - val_loss: 0.0704 - val_accuracy: 0.9736 Epoch 3/10 704/704 [==============================] - 878s 1s/step - loss: 0.0343 - accuracy: 0.9872 - val_loss: 0.0678 - val_accuracy: 0.9716 Epoch 4/10 704/704 [==============================] - 835s 1s/step - loss: 0.0176 - accuracy: 0.9938 - val_loss: 0.0871 - val_accuracy: 0.9716 Epoch 5/10 704/704 [==============================] - 838s 1s/step - loss: 0.0136 - accuracy: 0.9954 - val_loss: 0.0874 - val_accuracy: 0.9772 Epoch 6/10 704/704 [==============================] - 881s 1s/step - loss: 0.0090 - accuracy: 0.9962 - val_loss: 0.0817 - val_accuracy: 0.9744 Epoch 7/10 704/704 [==============================] - 949s 1s/step - loss: 0.0071 - accuracy: 0.9976 - val_loss: 0.1000 - val_accuracy: 0.9776 Epoch 8/10 704/704 [==============================] - 834s 1s/step - loss: 0.0055 - accuracy: 0.9980 - val_loss: 0.1166 - val_accuracy: 0.9752
plot_history(history_vgg_fine_tuned)
In above figure validation Accuracy and Training Accuracy is increasing with increase of no. of epochs. By early stopping it trainned for 8 epoch with 0.9752 as validation accuracy and 0.9980 as Training accuracy. Validation loss is decreasing at 8 epoch it will be 0.0055
model_vgg.save("VGG16_fine_tuned.h5")
c:\Users\simranjeet\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\engine\training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.
saving_api.save_model(
print("Training Accuracy: ", model_vgg.evaluate(train_ds, verbose=None)[1])
print("Validation Accuracy: ", model_vgg.evaluate(validation_ds, verbose=None)[1])
Training Accuracy: 0.996399998664856 Validation Accuracy: 0.9715999960899353
Training Accuracy: Increased from 88% to 99% with fine-tuning using Adamax optimizer. Test Accuracy: Improved from 83% to 97% with the same approach. Generalization: Enhanced ability to classify unseen data. Reduced Overfitting: Smaller gap between training and test accuracies. Model Robustness: More reliable and consistent predictions. Transfer Learning Efficacy: Demonstrates the effectiveness of using pre-trained models. Efficiency: Achieved higher accuracy with fewer training iterations.
We get impressive accuracy after fine tunning the model! After training the VGG16 transfer learning model, let's evaluate its performance on the validation dataset and visualize the results using a confusion matrix and classification report.
actual_labels = np.array([])
predicted_labels = np.array([])
# extract labels and model predictions
for images, labels in validation_ds:
labels = labels.numpy().reshape(-1).astype(int)
actual_labels = np.concatenate([actual_labels, labels])
predictions = model_vgg.predict(images, verbose=False).reshape(-1)
binary_predictions = (predictions > 0.5).astype(int)
predicted_labels = np.concatenate([predicted_labels, binary_predictions])
#classification report
print(classification_report(actual_labels, predicted_labels))
#confusion matrix
conf_mat = confusion_matrix(actual_labels, predicted_labels)
plt.figure(figsize=(5,4))
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues', xticklabels=['Cats', 'Dogs'], yticklabels=['Cats', 'Dogs']) #
plt.xlabel('Predicted'), plt.ylabel('Actual');
precision recall f1-score support
0.0 0.98 0.97 0.97 1250
1.0 0.97 0.98 0.97 1250
accuracy 0.97 2500
macro avg 0.97 0.97 0.97 2500
weighted avg 0.97 0.97 0.97 2500
The classification report shows that both classes, 0 and 1, achieved high precision, recall, and F1-score, resulting in an overall accuracy of 97% on the validation dataset, demonstrating strong performance in distinguishing between the two classes.
test_predictions = model_vgg.predict(test_images[:25])
test_predictions = (test_predictions > 0.5).astype(int)
results = pd.DataFrame({'id': test_ids[:25], 'label': test_predictions.reshape(-1)})
results.head()
1/1 [==============================] - 0s 472ms/step
| id | label | |
|---|---|---|
| 0 | 1 | 1 |
| 1 | 10 | 0 |
| 2 | 100 | 0 |
| 3 | 1000 | 1 |
| 4 | 10000 | 1 |
results.to_csv('test_pred.csv')
from keras.models import load_model
VGG16 = load_model("VGG16_fine_tuned.h5")
# Initialize arrays for storing actual and predicted labels
actual_labels = np.array([])
predicted_labels = np.array([])
# Extract labels and model predictions
for images, labels in validation_ds:
labels = labels.numpy().reshape(-1).astype(int)
actual_labels = np.concatenate([actual_labels, labels])
predictions = model_vgg.predict(images, verbose=False).reshape(-1)
binary_predictions = (predictions > 0.5).astype(int)
predicted_labels = np.concatenate([predicted_labels, binary_predictions])
# Classification report
print("Classification Report:")
print(classification_report(actual_labels, predicted_labels))
# Confusion matrix
conf_mat = confusion_matrix(actual_labels, predicted_labels)
plt.figure(figsize=(5,4))
sns.heatmap(conf_mat, annot=True, fmt='d', cmap='Blues', xticklabels=['Cats', 'Dogs'], yticklabels=['Cats', 'Dogs'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
# Precision-recall curve
precision, recall, _ = precision_recall_curve(actual_labels, predicted_labels)
plt.plot(recall, precision, marker='.')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.show()
Classification Report:
precision recall f1-score support
0.0 0.98 0.97 0.97 1250
1.0 0.97 0.98 0.97 1250
accuracy 0.97 2500
macro avg 0.97 0.97 0.97 2500
weighted avg 0.97 0.97 0.97 2500
The final conclusion based on the provided classification report is that the model exhibits exceptional performance in classifying between the two classes (0 and 1). With high precision, recall, and F1-score for both classes, coupled with an overall accuracy of 97% on the validation dataset, it indicates robust and reliable predictive capabilities. This level of accuracy suggests that the model is well-suited for real-world applications requiring precise classification between the given classes.
While the model demonstrates overall high accuracy in classifying between cats and dogs, specific instances where it failed to predict correctly might include cases where images were ambiguous or contained multiple subjects (e.g., a dog with a cat in the background), or instances where the image quality was poor, making it challenging for the model to extract meaningful features for classification.